from gxl_ai_utils.utils import utils_file
import sys

jsonl_file, text_file = sys.argv[1], sys.argv[2]

text_dict = {}
dict_list = utils_file.load_dict_list_from_jsonl(jsonl_file)
for dict_i in dict_list:
    if 'id' in dict_i and 'key' not in dict_i:
        dict_i['key'] = dict_i['id']
    if 'text' in dict_i and 'txt' not in dict_i:
        dict_i['txt'] = dict_i['text']

    if 'key' not in dict_i or 'txt' not in dict_i:
        print(f"key or txt not in dict_i: {dict_i}")
        continue

    text_dict[dict_i['key']] = dict_i['txt']

utils_file.write_dict_to_scp(text_dict, text_file)